import scrapy
from ebook.items import EbookItem
from ebook.items import EbookTypeItem


class SobooksSpider(scrapy.Spider):
    name = 'sobooks'
    allowed_domains = ['sobooks.cc']
    start_urls = ['http://sobooks.cc/']

    def parse(self, response):
        types = response.xpath('//*[@class="nav"]/li[position()<last()]/a')
        for type in types:
            type_item = EbookTypeItem()
            type_item['type_title'] = type.xpath('./text()').get()
            url = type.xpath('./@href').get()
            yield type_item
            yield scrapy.Request(url=url, callback=self.parse_book, meta={'type_title': type_item['type_title']})

    def parse_book(self, response):
        type_title = response.meta['type_title']
        books = response.xpath('//*[@class="card col span_1_of_4"][position()<9]')
        for book in books:
            url = book.xpath('.//h3/a/@href').get()
            yield scrapy.Request(url=url, callback=self.parse_bookinfo, meta={'type_title': type_title})

    def parse_bookinfo(self, response):
        type_title = response.meta['type_title']
        ebook_item = EbookItem()
        ebook_item['type_title'] = type_title
        ebook_item['ebook_cover'] = response.xpath('//*[@class="book-info"]//img/@src').get()
        ebookinfo_box = response.xpath('//*[@class="bookinfo"]/ul')
        ebook_item['ebook_title'] = ebookinfo_box.xpath('./li[1]/text()').get()
        ebook_item['ebook_author'] = ebookinfo_box.xpath('./li[2]/text()').get()
        ebook_item['ebook_tag'] = []
        tags = ebookinfo_box.xpath('./li[4]/a/text()')
        for tag in tags:
            ebook_item['ebook_tag'].append(tag.get())
        ebook_item['ebook_tag'] = ",".join(ebook_item['ebook_tag'])
        ebook_item['ebook_ctime'] = ebookinfo_box.xpath('./li[5]/text()').get()
        ebook_item['ebook_ISBN'] = ebookinfo_box.xpath('./li[7]/text()').get()
        eintros = []
        ps = response.xpath('//article//h2[2]/preceding-sibling::p')
        for p in ps:
            ptext= p.xpath('string(.)').get()
            eintros.append(ptext)
        ebook_item['ebook_eintro'] = '\n'.join(eintros)
        aintros = []
        aps = response.xpath('//article//h2[2]/following-sibling::node()[position()'
                             '<count(//article//h2[2]/following-sibling::node())-'
                             'count(//article//div[@class="e-secret"]/following-sibling::node())]')
        for ap in aps:
            aptext = ap.xpath('string(.)').get()
            aintros.append(aptext)
        ebook_item['ebook_aintro'] = '\n'.join(aintros)
        yield ebook_item
